require(magrittr)
require(knitr)
require(dplyr)
require(tidyr)
require(tibble)
require(ggplot2)
require(Seurat)
require(cowplot)
require(kableExtra)
require(readr)
require(readxl)
source("seurat_helper_functions.R")
parent.directory <- "intermediate/"
parent.directory.mtx <- "filtered/"
We take our GC and antibody-secreting cell (ASC) identifiers from our orignal Seurat dataset. We then get the information from the original 10X files, only taking the cell identifiers from the GC and ASCs.
load(file = paste0(parent.directory, "sct_cluster_metadata.RData"))
GCPC_metadata %>%
filter(parent_cluster != "B PLCG2") -> GCPC_metadata
list_identifiers <- c("TC124.1", "TC124.2", "TC124.3", "TC125", "TC126")
cell_barcodes_list <- lapply(1:length(list_identifiers), function(i){
GCPC_metadata %>%
filter(orig_ident == list_identifiers[i]) %>%
select(identifier) %>%
mutate(raw_barcode = sub(paste0(list_identifiers[i], "_"), "", identifier)) %>%
pull(raw_barcode) -> raw_barcodes
return(raw_barcodes)
})
TC124.1.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit1"), strip.suffix = TRUE)
TC124.2.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit7"), strip.suffix = TRUE)
TC124.3.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit8"), strip.suffix = TRUE)
TC125.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit2"), strip.suffix = TRUE)
TC126.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit3"), strip.suffix = TRUE)
B.names <- c("TC124.1", "TC124.2", "TC124.3", "TC125", "TC126")
B.list <- list(TC124.1.data, TC124.2.data, TC124.3.data, TC125.data, TC126.data)
rm(list = ls()[grepl(".data", ls(), fixed = TRUE)])
B.list <- lapply(1:length(B.list), function(i){
B.list[[i]] <- B.list[[i]][,cell_barcodes_list[[i]]]
colnames(B.list[[i]]) <- paste0(B.names[i], "_", colnames(B.list[[i]]))
return(CreateSeuratObject(B.list[[i]], project = B.names[[i]], min.cells = 3, min.features = 200))
})
Now that we have our new Seurat objects, we will add metadata and then will re-perform our SCTransform normalization and the Seurat v3 integration algorithm.
read_excel("dissociation_genes.xlsx") %>% select('Human ID') %>% na.omit %>% pull("Human ID") -> dissociation.genes
for(i in 1:length(B.list)){
temp.dissoc.genes <- dissociation.genes[dissociation.genes %in% rownames(B.list[[i]])]
B.list[[i]][["percent.dissoc"]] <- PercentageFeatureSet(B.list[[i]], features = temp.dissoc.genes)
B.list[[i]][["percent.mt"]] <- PercentageFeatureSet(B.list[[i]], pattern = "^MT-")
B.list[[i]][["donor"]] <- substring(B.list[[i]]$orig.ident, 1, 5)
}
#This removes the unused data thus far
rm(i, temp.dissoc.genes)
B.list <- list(merge(x = B.list[[1]], y = B.list[2:3]), B.list[[4]], B.list[[5]])
B.list <- lapply(1:length(B.list), function(i){
return(SCTransform(B.list[[i]], verbose = TRUE))
})
B.features <- SelectIntegrationFeatures(object.list = B.list, nfeatures = 3000)
B.list <- PrepSCTIntegration(object.list = B.list, anchor.features = B.features, verbose = TRUE)
B.anchors <- FindIntegrationAnchors(object.list = B.list, normalization.method = "SCT", anchor.features = B.features, dims = 1:40, verbose = TRUE)
B.gcpc <- IntegrateData(anchorset = B.anchors, normalization.method = "SCT", dims = 1:40, verbose = TRUE)
save(B.gcpc, GCPC_metadata, file = paste0(parent.directory, "sct_gcpc_integrated.RData"))
We perform PCA and UMAP and Louvain clustering just as before. We will also perform Louvain clustering at a single resolution, followed by scoring of specific gene sets just as in the original Seurat dataset.
load(file = paste0(parent.directory, "sct_gcpc_integrated.RData"))
B.gcpc[["SCT"]] <- NULL
IG.genes <- grep("^IGL|^IGK|^IGHV", B.gcpc@assays$integrated@var.features, value = TRUE)
B.gcpc@assays$integrated@var.features <- setdiff(B.gcpc@assays$integrated@var.features, c(IG.genes))
B.gcpc <- RunPCA(B.gcpc, npcs = 50, verbose = TRUE)
dims.to.use <- 1:30
B.gcpc <- BuildAnnoyUMAP(B.gcpc,
reduction = "pca",
metric = "euclidean",
dims = dims.to.use,
reduction_name = "pca_UMAP",
reduction_key = "pcaumap_",
return_graphs = TRUE)
DefaultAssay(B.gcpc) <- "RNA"
B.gcpc <- NormalizeData(B.gcpc)
B.gcpc <- FindClusters(B.gcpc, resolution = 0.7, graph.name = "annoy_nn_snn")
B.gcpc$previous_idents <- plyr::mapvalues(rownames(B.gcpc@meta.data), from = GCPC_metadata$identifier, to = as.character(GCPC_metadata$parent_cluster))
save(B.gcpc, file = paste0(parent.directory,"sct_seurat_gcpc_clustered.RData"))
We use the Seurat FindAllMarkers function and SoupX to find two sets of marker genes for our new clusters.
GCPC_RNA_markers <- FindAllMarkers(B.gcpc, assay = "RNA", logfc.threshold = 0.3, test.use = "LR", verbose = TRUE, only.pos = FALSE, latent.vars = "donor")
GCPC_SoupX_markers <- SoupX::quickMarkers(B.gcpc[["RNA"]]@data, B.gcpc$seurat_clusters, N = 20)
save(GCPC_RNA_markers, GCPC_SoupX_markers, file = paste0(parent.directory, "sct_seurat_gcpc_markers.RData"))
We can now visualize the new clusters and the previous clusters.
load(paste0(parent.directory,"sct_seurat_gcpc_clustered.RData"))
load(paste0(parent.directory,"sct_seurat_gcpc_markers.RData"))
DimPlot(B.gcpc, reduction = "pca_UMAP", label = TRUE, label.size = 5, label.box = TRUE, pt.size = 0.8, group.by = "previous_idents")
DimPlot(B.gcpc, reduction = "pca_UMAP", label = TRUE, label.size = 5, label.box = TRUE, pt.size = 0.8)
We will visualize the SoupX markers here.
cluster_list <- sort(unique(B.gcpc$seurat_clusters))
n_clusters <- length(cluster_list)
mrks <- SoupX::quickMarkers(B.gcpc[["RNA"]]@data, B.gcpc$seurat_clusters, N = 20)
mrks_list <- lapply(cluster_list, function(i){
mrks %>%
dplyr::filter(cluster == i) %>%
dplyr::pull(gene)
})
dotplot_list <- lapply(1:n_clusters, function(i) {
cluster_name <- cluster_list[i]
temp.slot <- paste0("Cluster ", cluster_name)
if(length(mrks_list[[{{i}}]]) == 0){return(NULL)}
src <- c("#### {{temp.slot}} {.unnumbered}",
"```{r soupdotplot-{{i}}, fig.width = 12, fig.height = 6}",
"DotPlot(B.gcpc, features = mrks_list[[{{i}}]], assay = \"RNA\")+RotatedAxis()",
"```",
"")
knit_expand(text = src)
})
out <- knit_child(text = unlist(dotplot_list), options = list(echo = FALSE, cache = FALSE))
DefaultAssay(B.gcpc) <- "RNA"
cluster_list <- levels(GCPC_RNA_markers$cluster)
n_clusters <- length(cluster_list)
gene_list <- lapply(cluster_list, function(i){
GCPC_RNA_markers %>%
dplyr::filter(p_val_adj < 0.01) %>%
dplyr::filter(cluster == i) %>%
dplyr::top_n(20, wt = avg_log2FC) %>%
dplyr::pull(gene)
})
dotplot_list <- lapply(1:n_clusters, function(i) {
cluster_name <- cluster_list[i]
temp.slot <- paste0("Cluster ", cluster_name)
if(length(gene_list[[i]]) == 0 ){
return(NULL)
}
src <- c("#### {{temp.slot}} {.unnumbered}",
"```{r dotplot-{{i}}, fig.width = 12, fig.height = 6}",
"DotPlot(B.gcpc, features = gene_list[[{{i}}]])+RotatedAxis()+scale_color_viridis_c(direction = -1, option = \"A\")",
"```",
"")
knit_expand(text = src)
})
out <- knit_child(text = unlist(dotplot_list), options = list(echo = FALSE, cache = FALSE))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
We include tables of the marker genes found in each cluster.
cluster_vector <- levels(B.gcpc@active.ident)
n_clusters <- length(cluster_vector)
markers.list <- lapply(1:n_clusters, function(i) {
GCPC_RNA_markers %>%
dplyr::filter(cluster == cluster_vector[i] & p_val_adj < 0.01) %>%
dplyr::arrange(-avg_log2FC) %>%
dplyr::select(cluster, gene, avg_log2FC, p_val, p_val_adj) %>%
dplyr::mutate_if(is.numeric, format, digits = 3)
})
names(markers.list) <- paste("Cluster", cluster_vector)
markers.list <- c(markers.list, "ALL" = list(GCPC_RNA_markers %>% dplyr::arrange(-avg_log2FC) %>% select(cluster, gene, avg_log2FC, p_val, p_val_adj) %>% mutate_if(is.numeric, format, digits = 3)))
src_list <- lapply(1:(n_clusters+1), function(i) {
if(i == (n_clusters+1)){
label <- "ALL"
} else {
label <- cluster_vector[i]
}
src <- c("#### {{label}} {.unnumbered} ",
"<div style = \"width:65%; height:auto; align:left\">",
"```{r marker-cluster-{{label}}, fig.width=3}",
"DT::datatable(markers.list[[{{i}}]], rownames = FALSE)",
"```",
"</div>",
"")
knit_expand(text = src)
})
out <- knit_child(text = unlist(src_list), options = list(echo = FALSE, cache = FALSE))
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
The cluster identities remain largely the same as before.
#load(file = paste0(parent.directory, "sct_seurat_gcpc_clustered.RData"))
cluster_levels <- paste0("C",c(1,8,4,12,7,6,2,10,9,5,3,11,0))
cluster_names <- c("GC 1",
"GC 2",
"GC 3",
"GC IgA",
"LZ",
"DZ 1",
"DZ 2",
"DZ 3",
"DZ 4",
"DZ 5",
"GC LMO2",
"ASC IgM",
"ASC IgG")
B.gcpc$new_clusters_nums <- factor(paste0("C", as.character(B.gcpc$annoy_nn_snn_res.0.7)),
levels = cluster_levels)
B.gcpc$cluster_names <- plyr::mapvalues(B.gcpc$new_clusters_nums,
from = cluster_levels,
to = cluster_names)
Idents(B.gcpc) <- "cluster_names"
DimPlot(B.gcpc, label = TRUE, label.box = TRUE, repel = TRUE, cols = c("dodgerblue", "forestgreen", RColorBrewer::brewer.pal(11, "Set3")))+NoLegend()
save(B.gcpc, file = paste0(parent.directory, "sct_seurat_gcpc_named_clusters.RData"))
load(file = paste0(parent.directory, "sct_seurat_gcpc_named_clusters.RData"))
DimPlot(B.gcpc, label = TRUE, label.box = TRUE, repel = TRUE, cols = c("dodgerblue", "forestgreen", RColorBrewer::brewer.pal(11, "Set3")))+NoLegend()